Spread within variables


In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%pylab inline
pd.__version__ # need 0.14.0 for multiindex slicing


Populating the interactive namespace from numpy and matplotlib
Out[2]:
'0.14.1'

Read files


In [3]:
o = pd.read_table("overall_statistics_ksmall.txt").set_index(["K","M","STATISTIC"])["VALUE"].unstack().loc[(10,200),:]
v = pd.read_table("variable_statistics_ksmall.txt").set_index(["K","M","STATISTIC","VARIABLE"])["VALUE"].unstack().unstack().loc[(10,200),:].unstack()

In [4]:
statistics_of_interest = ["rms_error","max_error","precisionbits","srr","correlation"]

Load variable information


In [5]:
# load variable information for joining levels to variables
v_info = pd.read_table("variable_information.txt").set_index(["VARIABLE","INFO"]).unstack().loc[:,"VALUE"]
v_info["levels"] = v_info["levels"].astype("int")
v_info.columns.name = ""

Sort by RMS error


In [8]:
v.sort("rms_error")[["rms_error","max_error","precisionbits","srr"]].join(v_info["name"]).tail(5)


Out[8]:
rms_error max_error precisionbits srr name
VARIABLE
FREQS 0.002167 0.051397 3.28217 6.26081 Fractional occurance of snow
FREQZM 0.002256 0.022684 4.46221 6.60402 Fractional occurance of ZM convection
PSL 0.002299 0.024323 4.36156 6.29992 Sea level pressure
CLDMED 0.002491 0.030366 4.04140 6.07505 Vertically-integrated mid-level cloud
SSTSFMBL 0.002599 0.062668 2.99611 6.28976 Mobilization flux at surface

In [ ]:
v.sort("rms_error")[statistics_of_interest].join(v_info).tail(10)

Sort by maximum error


In [ ]:
v.sort("max_error")[statistics_of_interest].join(v_info).head(10)

In [ ]:
v.sort("max_error")[statistics_of_interest].join(v_info).tail(10)

Ratio maximum error / RMS error


In [ ]:
v["error_ratio"] = v.max_error / v.rms_error
er = v.loc(axis=1)[("error_ratio",)].sort("error_ratio")
(er.mean(), er.median(),er.head(5),er.tail(5))

In [ ]:
v.loc[["U","FSDSC","Z3","CCN3"],["max_error","rms_error"]].T